library(car)
library(mosaic)
library(DT)
library(tidyverse)
files <- dir()
rdat <- read.csv(grep(".csv", files, value=TRUE), header=TRUE)
as.tibble(rdat)
# A tibble: 155 x 11
Y X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
<dbl> <dbl> <int> <int> <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
1 -6.11 2.17 1 1 0 1 1 3.00 6.72 42.2 0.223
2 -5.66 2.17 1 1 1 0 0 0.799 8.38 30.0 0.337
3 -0.248 -0.333 0 0 1 0 1 2.69 25.1 2.34 0.429
4 -11.2 -0.857 1 0 1 1 1 1.31 11.7 6.81 0.626
5 -10.6 1.03 1 1 1 0 1 2.64 26.5 32.1 1.26
6 -2.81 1.19 1 0 1 0 1 3.15 32.4 28.5 0.422
7 -1.86 1.38 0 0 1 0 0 1.50 10.5 8.99 1.30
8 -17.8 -0.990 1 1 1 1 1 2.02 21.2 18.6 1.01
9 -8.57 -0.875 0 1 1 0 0 1.16 10.4 10.7 1.14
10 2.13 1.09 1 0 0 0 1 0.0908 1.47 27.3 0.0564
# ... with 145 more rows
pairs(rdat, pch=16, cex=1, panel=panel.smooth, col=rgb(.8,.8,.8))
lm1 <- lm(Y ~ X4 + X5, data=rdat)
summary(lm1)
##
## Call:
## lm(formula = Y ~ X4 + X5, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6441 -2.5074 0.0913 2.1465 13.7465
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.2437 0.6044 3.712 0.000288 ***
## X4 -5.9592 0.6645 -8.968 1.04e-15 ***
## X5 -5.4109 0.6664 -8.119 1.49e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.11 on 152 degrees of freedom
## Multiple R-squared: 0.5059, Adjusted R-squared: 0.4994
## F-statistic: 77.81 on 2 and 152 DF, p-value: < 2.2e-16
palette(c("skyblue","orange","green","purple"))
pairs(cbind(R=lm1$res, Fit=lm1$fit, rdat), pch=16, cex=1, panel=panel.smooth, col=interaction(rdat$X4,rdat$X5))
I also worry about how tightly X7 and X8 are linked to the X4 and X5 variables.
lm2 <- lm(Y ~ X3 + X4 + X5, data=rdat)
summary(lm2)
##
## Call:
## lm(formula = Y ~ X3 + X4 + X5, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.0816 -1.9313 -0.4369 1.4425 15.3090
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.0526 0.6632 6.110 8.08e-09 ***
## X3 -3.1322 0.6148 -5.095 1.03e-06 ***
## X4 -6.1623 0.6172 -9.985 < 2e-16 ***
## X5 -5.4469 0.6177 -8.818 2.63e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.809 on 151 degrees of freedom
## Multiple R-squared: 0.5784, Adjusted R-squared: 0.57
## F-statistic: 69.04 on 3 and 151 DF, p-value: < 2.2e-16
palette(c("skyblue","orange","green","purple","blue","red","green4","black"))
pairs(cbind(R=lm2$res, Fit=lm2$fit, rdat), pch=16, cex=1, panel=panel.smooth, col=interaction(rdat$X4,rdat$X5,rdat$X3))
plot(Y ~ X10, data=rdat, col=interaction(X3,X4,X5))
lm3 <- lm(Y ~ X2 + X3 + X4 + X5, data=rdat)
summary(lm3)
##
## Call:
## lm(formula = Y ~ X2 + X3 + X4 + X5, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4498 -1.9356 -0.5587 1.2835 15.1969
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.7692 0.7193 6.630 5.69e-10 ***
## X2 -1.4467 0.6081 -2.379 0.0186 *
## X3 -3.2842 0.6089 -5.394 2.63e-07 ***
## X4 -6.0713 0.6090 -9.968 < 2e-16 ***
## X5 -5.3872 0.6089 -8.848 2.30e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.751 on 150 degrees of freedom
## Multiple R-squared: 0.5937, Adjusted R-squared: 0.5829
## F-statistic: 54.8 on 4 and 150 DF, p-value: < 2.2e-16
palette(c("skyblue","orange","green","purple","blue","red","green4","black"))
pairs(cbind(R=lm3$res, Fit=lm3$fit, rdat), pch=16, cex=1, panel=panel.smooth, col=interaction(rdat$X4,rdat$X5,rdat$X3,rdat$X2))
plot(Y ~ X10, data=rdat, col=interaction(X2,X3,X4,X5))
lm4 <- lm(Y ~ X2 + X3 + X4 + X5 + X10 + X10:X2 + X10:X3 + X10:X4 + X10:X5 + I(X10^2) + I(X10^2):X2 + I(X10^2):X3 + I(X10^2):X4 + I(X10^2):X5, data=rdat)
summary(lm4)
##
## Call:
## lm(formula = Y ~ X2 + X3 + X4 + X5 + X10 + X10:X2 + X10:X3 +
## X10:X4 + X10:X5 + I(X10^2) + I(X10^2):X2 + I(X10^2):X3 +
## I(X10^2):X4 + I(X10^2):X5, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.314895 -0.056741 -0.003001 0.062851 0.217688
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.00720 0.03052 131.28 <2e-16 ***
## X2 -1.99237 0.02563 -77.73 <2e-16 ***
## X3 -0.85955 0.02671 -32.18 <2e-16 ***
## X4 -2.99079 0.02806 -106.57 <2e-16 ***
## X5 -5.13591 0.02786 -184.37 <2e-16 ***
## X10 5.12426 0.09374 54.67 <2e-16 ***
## I(X10^2) -1.12782 0.07143 -15.79 <2e-16 ***
## X2:X10 -2.17083 0.07727 -28.09 <2e-16 ***
## X3:X10 -6.70508 0.08239 -81.38 <2e-16 ***
## X4:X10 -8.54898 0.08566 -99.80 <2e-16 ***
## X5:X10 -7.37333 0.08453 -87.23 <2e-16 ***
## X2:I(X10^2) 2.12915 0.06090 34.96 <2e-16 ***
## X3:I(X10^2) 1.08973 0.06354 17.15 <2e-16 ***
## X4:I(X10^2) 2.05988 0.06527 31.56 <2e-16 ***
## X5:I(X10^2) 4.66846 0.06412 72.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1061 on 140 degrees of freedom
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9997
## F-statistic: 3.295e+04 on 14 and 140 DF, p-value: < 2.2e-16
plot(Y ~ X10, data=rdat, col=interaction(X2,X3,X4,X5))
points(lm4$fitted ~ X10, data=rdat, col=interaction(X2,X3,X4,X5), cex=0.5, pch=16)
Looks like we got it! Now to draw all the curves. (Notice all of the fitted value almost exactly on the center of each dot in the plot above.)
plot(Y ~ X10, data=rdat, col=interaction(X2,X3,X4,X5))
b <- coef(lm4)
X2 = 0; X3 = 0; X4 = 0; X5 = 0;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[1])
X2 = 1; X3 = 0; X4 = 0; X5 = 0;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[2])
X2 = 0; X3 = 1; X4 = 0; X5 = 0;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[3])
X2 = 0; X3 = 0; X4 = 1; X5 = 0;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[5])
X2 = 0; X3 = 0; X4 = 0; X5 = 1;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[1])
X2 = 1; X3 = 1; X4 = 0; X5 = 0;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[4])
X2 = 1; X3 = 0; X4 = 1; X5 = 0;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[6])
X2 = 1; X3 = 0; X4 = 0; X5 = 1;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[2])
X2 = 0; X3 = 1; X4 = 0; X5 = 1;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[3])
X2 = 0; X3 = 1; X4 = 1; X5 = 0;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[7])
X2 = 0; X3 = 0; X4 = 1; X5 = 1;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[5])
X2 = 1; X3 = 1; X4 = 1; X5 = 0;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[8])
X2 = 1; X3 = 0; X4 = 1; X5 = 1;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[6])
X2 = 1; X3 = 1; X4 = 0; X5 = 1;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[4])
X2 = 0; X3 = 1; X4 = 1; X5 = 1;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[7])
X2 = 1; X3 = 1; X4 = 1; X5 = 1;
curve(b[1] + b[2]*X2 + b[3]*X3 + b[4]*X4 + b[5]*X5 + b[6]*x + b[7]*x^2 + b[8]*x*X2 + b[9]*x*X3 + b[10]*x*X4 + b[11]*x*X5 + b[12]*x^2*X2 + b[13]*x^2*X3 + b[14]*x^2*X4 + b[15]*x^2*X5, add=TRUE, col=palette()[8])
Easy way to draw it…
ggplot(rdat, aes(x=X10, y=Y, col=interaction(X2,X3,X4,X5))) +
geom_point() +
geom_smooth(method="lm", se=F, formula=y ~ poly(x, 2, raw=TRUE) )